# Author: Emilie Helen Wolf
# October 19, 2021
Data Dictionary
import numpy as np
np.random.seed(55)
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
sns.set_context("talk")
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# ignore warnings that might arise from plots
import warnings
warnings.filterwarnings("ignore")
# import the csv as a pandas DataFrame
df = pd.read_csv('used_phone_data.csv')
df.head()
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Honor | Android | 23.97 | yes | no | 13.0 | 5.0 | 64.0 | 3.0 | 3020.0 | 146.0 | 2020 | 127 | 111.62 | 86.96 |
| 1 | Honor | Android | 28.10 | yes | yes | 13.0 | 16.0 | 128.0 | 8.0 | 4300.0 | 213.0 | 2020 | 325 | 249.39 | 161.49 |
| 2 | Honor | Android | 24.29 | yes | yes | 13.0 | 8.0 | 128.0 | 8.0 | 4200.0 | 213.0 | 2020 | 162 | 359.47 | 268.55 |
| 3 | Honor | Android | 26.04 | yes | yes | 13.0 | 8.0 | 64.0 | 6.0 | 7250.0 | 480.0 | 2020 | 345 | 278.93 | 180.23 |
| 4 | Honor | Android | 15.72 | yes | no | 13.0 | 8.0 | 64.0 | 3.0 | 5000.0 | 185.0 | 2020 | 293 | 140.87 | 103.80 |
df.info()
# Shape: 3571 x 15
# Nulls: 6 columns have missing values
# Types: float64(9), int64(2), object(4)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3571 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3571 non-null object 1 os 3571 non-null object 2 screen_size 3571 non-null float64 3 4g 3571 non-null object 4 5g 3571 non-null object 5 main_camera_mp 3391 non-null float64 6 selfie_camera_mp 3569 non-null float64 7 int_memory 3561 non-null float64 8 ram 3561 non-null float64 9 battery 3565 non-null float64 10 weight 3564 non-null float64 11 release_year 3571 non-null int64 12 days_used 3571 non-null int64 13 new_price 3571 non-null float64 14 used_price 3571 non-null float64 dtypes: float64(9), int64(2), object(4) memory usage: 418.6+ KB
# Count number of rows with at least one missing value
df.isnull().any(axis=1).sum()
# 203 rows with missing values
203
# Count how many values are missing in each column
df.isnull().sum()
# Look at the missingness
# 2 of the columns have the same number of missing values
brand_name 0 os 0 screen_size 0 4g 0 5g 0 main_camera_mp 180 selfie_camera_mp 2 int_memory 10 ram 10 battery 6 weight 7 release_year 0 days_used 0 new_price 0 used_price 0 dtype: int64
df[df['ram'].isnull()]
# These 10 Nokia phones from 2015 and 2016 are missing both ram and int_memory
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2140 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 81.0 | 2016 | 815 | 29.21 | 8.65 |
| 2141 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 82.6 | 2016 | 1023 | 38.78 | 11.42 |
| 2142 | Nokia | Others | 6.35 | no | no | 2.0 | 2.0 | NaN | NaN | 1200.0 | 91.8 | 2015 | 867 | 68.01 | 20.46 |
| 2143 | Nokia | Others | 6.35 | no | no | 2.0 | 2.0 | NaN | NaN | 1200.0 | 91.8 | 2015 | 772 | 69.94 | 28.06 |
| 2144 | Nokia | Others | 5.72 | no | no | 2.0 | 2.0 | NaN | NaN | 1100.0 | 79.0 | 2015 | 1012 | 41.92 | 12.52 |
| 2145 | Nokia | Others | 5.72 | no | no | 2.0 | 2.0 | NaN | NaN | 1100.0 | 79.0 | 2015 | 798 | 40.48 | 16.13 |
| 2146 | Nokia | Others | 3.18 | no | no | 2.0 | 2.0 | NaN | NaN | 800.0 | 69.6 | 2015 | 1009 | 19.92 | 6.07 |
| 2147 | Nokia | Others | 3.18 | no | no | 2.0 | 2.0 | NaN | NaN | 800.0 | 69.8 | 2015 | 665 | 18.48 | 7.48 |
| 2148 | Nokia | Others | 5.72 | no | no | 0.3 | 2.0 | NaN | NaN | 1100.0 | 78.4 | 2015 | 559 | 29.33 | 14.76 |
| 2149 | Nokia | Others | 5.72 | no | no | 0.3 | 2.0 | NaN | NaN | 1100.0 | 78.6 | 2015 | 682 | 30.00 | 12.03 |
# What are the int_memory and ram values for other Nokia phones in 2014 and 2017?
(df.loc[(df['brand_name']=='Nokia') &
((df['release_year']==2014) | (df['release_year']==2017))]
.groupby('release_year')['int_memory','ram'].describe())
# I think it's safe to replace all 10 with 32.0 and 4.0
| int_memory | ram | |||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
| release_year | ||||||||||||||||
| 2014 | 22.0 | 28.363636 | 10.965511 | 16.0 | 16.0 | 32.0 | 32.0 | 64.0 | 22.0 | 4.000 | 0.000000 | 4.00 | 4.0 | 4.0 | 4.0 | 4.0 |
| 2017 | 10.0 | 43.200000 | 34.605073 | 16.0 | 20.0 | 32.0 | 56.0 | 128.0 | 10.0 | 3.625 | 1.185854 | 0.25 | 4.0 | 4.0 | 4.0 | 4.0 |
# df[df['main_camera_mp'].isnull()].sample(n=10)
df[df['main_camera_mp'].isnull()].groupby(['release_year','brand_name'])['release_year'].count()
# Phones with missing main_camera_mp values are mostly newer models
release_year brand_name
2017 OnePlus 2
2018 BlackBerry 4
Coolpad 1
OnePlus 3
Panasonic 2
2019 Asus 4
Coolpad 1
Infinix 4
Meizu 11
OnePlus 8
Realme 12
Sony 5
2020 Coolpad 1
Infinix 6
Lava 2
Meizu 4
Motorola 18
OnePlus 4
Oppo 20
Realme 24
Sony 2
Vivo 14
Xiaomi 24
ZTE 4
Name: release_year, dtype: int64
df[df['weight'].isnull()]
# These 7 XOLO phones from 2015 are missing weight
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3113 | XOLO | Android | 12.70 | yes | no | 13.0 | 5.0 | 32.0 | 4.0 | 2400.0 | NaN | 2015 | 576 | 138.44 | 69.38 |
| 3114 | XOLO | Android | 13.49 | yes | no | 8.0 | 5.0 | 16.0 | 4.0 | 3200.0 | NaN | 2015 | 800 | 179.33 | 53.81 |
| 3115 | XOLO | Android | 12.70 | no | no | 8.0 | 2.0 | 32.0 | 4.0 | 2100.0 | NaN | 2015 | 878 | 59.21 | 17.77 |
| 3116 | XOLO | Android | 10.95 | no | no | 5.0 | 0.3 | 32.0 | 4.0 | 1800.0 | NaN | 2015 | 1036 | 81.13 | 24.32 |
| 3117 | XOLO | Android | 12.70 | no | no | 5.0 | 0.3 | 16.0 | 4.0 | 2500.0 | NaN | 2015 | 679 | 77.26 | 30.85 |
| 3118 | XOLO | Windows | 12.70 | no | no | 8.0 | 2.0 | 32.0 | 4.0 | 2200.0 | NaN | 2015 | 838 | 120.51 | 36.14 |
| 3119 | XOLO | Android | 12.70 | no | no | 8.0 | 5.0 | 32.0 | 4.0 | 2500.0 | NaN | 2015 | 1045 | 119.75 | 35.78 |
# What are the weights for other XOLO phones?
df.loc[(df['brand_name']=='XOLO')].groupby('release_year')['weight'].describe()
# Outside research says 2015 XOLO phones weight 125
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| release_year | ||||||||
| 2013 | 24.0 | 175.116667 | 84.293572 | 116.0 | 140.0 | 148.85 | 160.25 | 480.0 |
| 2014 | 18.0 | 118.888889 | 4.714045 | 100.0 | 120.0 | 120.00 | 120.00 | 120.0 |
| 2015 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df[df['battery'].isnull()]
# No pattern for battery nulls
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1901 | Meizu | Android | 13.49 | yes | no | 13.0 | 5.0 | 16.0 | 4.0 | NaN | 145.0 | 2014 | 986 | 129.49 | 38.67 |
| 1903 | Meizu | Android | 13.49 | yes | no | 20.7 | 5.0 | 16.0 | 4.0 | NaN | 158.0 | 2014 | 1043 | 399.67 | 119.86 |
| 1904 | Meizu | Android | 18.42 | yes | no | 20.7 | 2.0 | 16.0 | 4.0 | NaN | 147.0 | 2014 | 1007 | 339.04 | 101.65 |
| 2037 | Microsoft | Windows | 26.35 | no | no | 5.0 | 3.5 | 32.0 | 4.0 | NaN | 675.9 | 2013 | 931 | 331.21 | 99.49 |
| 2140 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 81.0 | 2016 | 815 | 29.21 | 8.65 |
| 2141 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 82.6 | 2016 | 1023 | 38.78 | 11.42 |
# Let's look at a summary of all 62 Meizu phones
df.loc[(df['brand_name']=='Meizu')].groupby('release_year')['battery'].describe()
# The battery for the 3 Meizu are probably between 2400-3150mAh
# Outside research says: 3100 (147g), 3350(158g), 3140(145g)
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| release_year | ||||||||
| 2013 | 1.0 | 2400.000000 | NaN | 2400.0 | 2400.00 | 2400.0 | 2400.00 | 2400.0 |
| 2014 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2015 | 6.0 | 3005.000000 | 250.000000 | 2500.0 | 3061.25 | 3095.0 | 3128.75 | 3150.0 |
| 2016 | 14.0 | 3047.857143 | 196.436663 | 2560.0 | 3060.00 | 3060.0 | 3060.00 | 3400.0 |
| 2017 | 7.0 | 3315.000000 | 359.525150 | 3000.0 | 3035.00 | 3235.0 | 3450.00 | 4000.0 |
| 2018 | 16.0 | 3188.750000 | 215.031006 | 3000.0 | 3000.00 | 3100.0 | 3315.00 | 3600.0 |
| 2019 | 11.0 | 3945.454545 | 332.757078 | 3600.0 | 3600.00 | 4000.0 | 4000.00 | 4500.0 |
| 2020 | 4.0 | 4500.000000 | 0.000000 | 4500.0 | 4500.00 | 4500.0 | 4500.00 | 4500.0 |
df[df['selfie_camera_mp'].isnull()]
# Are these two Google phones missing a selfie camera????
# Outside research says the 2018 Google phone (Pixel 3) has a selfie camera with 8MP
# The other one matches the Pixel 3 XL, which has an 8MP selfie camera as well
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1131 | Android | 15.72 | yes | no | 12.2 | NaN | 64.0 | 4.0 | 3430.0 | 184.0 | 2018 | 475 | 959.08 | 479.52 | |
| 1132 | Android | 13.49 | yes | no | 12.2 | NaN | 64.0 | 4.0 | 2915.0 | 148.0 | 2018 | 424 | 849.87 | 425.16 |
# Look at all the objects and their unique values (discrete variables)
num_to_display = 35
for colname in df.dtypes[df.dtypes == 'object'].index:
val_counts = df[colname].value_counts(dropna=False)
print(val_counts[:num_to_display])
if len(val_counts) > num_to_display:
print(f'Only displaying first {num_to_display} of {len(val_counts)} values.')
print('\n')
Others 509 Samsung 364 Huawei 264 LG 212 Lenovo 172 ZTE 141 Xiaomi 134 Oppo 129 Asus 126 Alcatel 125 Nokia 121 Micromax 120 Honor 118 Vivo 117 HTC 110 Motorola 110 Sony 88 Meizu 62 Apple 59 Gionee 56 Acer 51 XOLO 49 Panasonic 47 Realme 41 Celkon 37 Lava 36 Spice 30 Karbonn 30 OnePlus 22 BlackBerry 22 Coolpad 22 Microsoft 22 Google 15 Infinix 10 Name: brand_name, dtype: int64 Android 3246 Others 202 Windows 67 iOS 56 Name: os, dtype: int64 yes 2359 no 1212 Name: 4g, dtype: int64 no 3419 yes 152 Name: 5g, dtype: int64
df.describe().T
# Look at the statistical summaries of the continuous variables
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| screen_size | 3571.0 | 14.803892 | 5.153092 | 2.700 | 12.700 | 13.49 | 16.510 | 46.36 |
| main_camera_mp | 3391.0 | 9.400454 | 4.818396 | 0.080 | 5.000 | 8.00 | 13.000 | 48.00 |
| selfie_camera_mp | 3569.0 | 6.547352 | 6.879359 | 0.300 | 2.000 | 5.00 | 8.000 | 32.00 |
| int_memory | 3561.0 | 54.532607 | 84.696246 | 0.005 | 16.000 | 32.00 | 64.000 | 1024.00 |
| ram | 3561.0 | 4.056962 | 1.391844 | 0.030 | 4.000 | 4.00 | 4.000 | 16.00 |
| battery | 3565.0 | 3067.225666 | 1364.206665 | 80.000 | 2100.000 | 3000.00 | 4000.000 | 12000.00 |
| weight | 3564.0 | 179.424285 | 90.280856 | 23.000 | 140.000 | 159.00 | 184.000 | 950.00 |
| release_year | 3571.0 | 2015.964996 | 2.291784 | 2013.000 | 2014.000 | 2016.00 | 2018.000 | 2020.00 |
| days_used | 3571.0 | 675.391487 | 248.640972 | 91.000 | 536.000 | 690.00 | 872.000 | 1094.00 |
| new_price | 3571.0 | 237.389037 | 197.545581 | 9.130 | 120.130 | 189.80 | 291.935 | 2560.20 |
| used_price | 3571.0 | 109.880277 | 121.501226 | 2.510 | 45.205 | 75.53 | 126.000 | 1916.54 |
for i in df.columns:
sns.histplot(data=df, x=i)
plt.show()
# Look at a histogram of each variable
df[df.duplicated()]
# Check for duplicates, none found
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price |
|---|
Questions:
# Distribution of used prices
plt.figure(figsize=(8,5))
sns.violinplot(data=df, x='used_price');
# function to create labeled barplots
def labeled_barplot(data, feature, perc=True, n=None):
"""
Barplot with percentage at the top
data: dataframe
feature: dataframe column
perc: whether to display percentages instead of count (default is True)
n: displays the top n category levels (default is None, i.e., display all levels)
"""
total = len(data[feature]) # length of the column
count = data[feature].nunique()
if n is None:
plt.figure(figsize=(count + 1, 5))
else:
plt.figure(figsize=(n + 1, 5))
plt.xticks(rotation=90, fontsize=15)
ax = sns.countplot(
data=data,
x=feature,
palette="magma_r",
order=data[feature].value_counts().index[:n].sort_values(),
)
for p in ax.patches:
if perc == True:
label = "{:.1f}%".format(
100 * p.get_height() / total
) # percentage of each class of the category
else:
label = p.get_height() # count of each level of the category
x = p.get_x() + p.get_width() / 2 # width of the plot
y = p.get_height() # height of the plot
ax.annotate(
label,
(x, y),
ha="center",
va="center",
size=12,
xytext=(0, 5),
textcoords="offset points",
) # annotate the percentage
plt.show() # show the plot
df.columns
Index(['brand_name', 'os', 'screen_size', '4g', '5g', 'main_camera_mp',
'selfie_camera_mp', 'int_memory', 'ram', 'battery', 'weight',
'release_year', 'days_used', 'new_price', 'used_price'],
dtype='object')
# What percentage of the used phone market is dominated by Android devices?
labeled_barplot(df, "os")
# 90.9%
labeled_barplot(df, "brand_name")
# Percent of each brand
df['brand_name'].value_counts(normalize=True)
Others 0.142537 Samsung 0.101932 Huawei 0.073929 LG 0.059367 Lenovo 0.048166 ZTE 0.039485 Xiaomi 0.037525 Oppo 0.036124 Asus 0.035284 Alcatel 0.035004 Nokia 0.033884 Micromax 0.033604 Honor 0.033044 Vivo 0.032764 HTC 0.030804 Motorola 0.030804 Sony 0.024643 Meizu 0.017362 Apple 0.016522 Gionee 0.015682 Acer 0.014282 XOLO 0.013722 Panasonic 0.013162 Realme 0.011481 Celkon 0.010361 Lava 0.010081 Spice 0.008401 Karbonn 0.008401 OnePlus 0.006161 BlackBerry 0.006161 Coolpad 0.006161 Microsoft 0.006161 Google 0.004201 Infinix 0.002800 Name: brand_name, dtype: float64
# What's the max ram available per release year?
plt.figure(figsize=(11,8))
sns.barplot(data=df, x='release_year', y='ram', estimator=np.max, ci=None, palette='turbo')
plt.title("Maximum Phone RAM Available by Release Year")
plt.xlabel("Release Year")
plt.ylabel("RAM in GB")
print(df.groupby('release_year')['ram'].max())
release_year 2013 4.0 2014 4.0 2015 4.0 2016 4.0 2017 4.0 2018 8.0 2019 16.0 2020 16.0 Name: ram, dtype: float64
# How many phones of each brand are available?
plt.figure(figsize=(25, 10))
sns.countplot(df['brand_name'], palette='magma_r', order = df['brand_name'].value_counts().index)
plt.title("Count of Used Phones by Brand");
plt.xticks(rotation=90);
# RAM by brand
plt.figure(figsize=(25, 10))
sns.barplot(data=df,
x='brand_name',
y='ram',
palette='magma_r',
order=df.groupby(['brand_name'])['ram'].mean().sort_values(ascending=False).index
);
plt.xticks(rotation=90);
plt.title("Average RAM by brand");
# Comparison of Battery and Weight by Year
plt.figure(figsize=(15, 8))
sns.scatterplot(data=df,
x='weight', y='battery', hue='release_year',
palette='ch:start=.5,rot=-.75',
alpha=.5, legend='full'
)
plt.title("Battery compared to Weight and Year");
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
<matplotlib.legend.Legend at 0x1e78c40c610>
# Same scatterplot as above with screen_size added
plt.figure(figsize=(15, 10))
sns.scatterplot(data=df,
y='battery',
x='weight',
#palette='ch:start=.5,rot=-.75',
palette='coolwarm',
alpha=.5,
legend='brief',
size='screen_size',
hue = 'release_year',
sizes=(50,1000),
)
plt.title("Weight, Battery, Screen Size, and Release Year");
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
# What about new price compared to screen_size and main_camera_mp?
plt.figure(figsize=(15, 10))
sns.scatterplot(data=df,
y='new_price',
x='main_camera_mp',
#palette='ch:start=.5,rot=-.75',
palette='coolwarm',
alpha=.5,
legend='brief',
size='screen_size',
hue = 'release_year',
sizes=(50,1000),
)
plt.title("Title");
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
# Are there any light-weight phones with large batteries?
plt.figure(figsize=(13, 8))
sns.regplot(data=df[df['battery']>4500], y='weight', x='battery', marker='.')
plt.title("Weight of phones with batteries greater than 4500mAh");
# How many phones are available across different brands with a screen size larger than 6 inches?
# 6in = 15.24cm
plt.figure(figsize=(25, 10))
sns.countplot(data=df[df['screen_size']>15.24], x='brand_name', palette='magma_r',
order = df[df['screen_size']>15.24].groupby(['brand_name'])['brand_name'].count().sort_values(ascending=False).index)
plt.title("Count of Used Phones by Brand that have screens larger than 6 inches");
plt.xticks(rotation=90);
# What is the distribution of budget phones offering greater than 8MP selfie cameras across brands?
plt.figure(figsize=(25, 10))
sns.countplot(data=df[df['selfie_camera_mp']>8], x='brand_name', palette='magma_r',
order = df[df['selfie_camera_mp']>8].groupby(['brand_name'])['brand_name'].count().sort_values(ascending=False).index)
plt.title("Count of Used Phones by Brand that have greater than 8MP selfie cameras");
plt.xticks(rotation=90);
df[df['selfie_camera_mp']>8].groupby(['brand_name'])['brand_name'].count().sort_values(ascending=False)
brand_name Huawei 91 Vivo 78 Oppo 75 Xiaomi 65 Samsung 60 Honor 43 Others 34 LG 32 Motorola 26 Meizu 24 ZTE 20 HTC 20 OnePlus 18 Realme 18 Sony 14 Lenovo 14 Nokia 10 Asus 6 Infinix 4 Gionee 4 Coolpad 3 Micromax 2 Panasonic 2 BlackBerry 2 Acer 1 Name: brand_name, dtype: int64
# Which attributes are highly correlated with the used phone price?
plt.figure(figsize = (25,12))
sns.heatmap(df.corr(), annot=True, vmin=-1, vmax=1, cmap='Spectral');
print(abs(df.corr())['used_price'].sort_values(ascending=False))
used_price 1.000000 new_price 0.926087 ram 0.522659 selfie_camera_mp 0.497530 days_used 0.470760 release_year 0.456764 int_memory 0.404913 screen_size 0.385928 battery 0.363767 main_camera_mp 0.281065 weight 0.170654 Name: used_price, dtype: float64
# plt.figure(figsize = (25,15))
# sns.heatmap(abs(df.corr()), annot=True, vmin=0, vmax=1);
# How has the selfie camera resolution changed over the years?
plt.figure(figsize=(18, 7))
sns.pointplot(data=df,x='release_year',y='selfie_camera_mp', hue='os');
# What's the average used price of a phone for each release year?
plt.figure(figsize=(8,5))
sns.pointplot(data=df, x='release_year', y='used_price')
# The older the phone, the less it costs
<AxesSubplot:xlabel='release_year', ylabel='used_price'>
# Comparison of new vs used prices in relation to release_year
plt.figure(figsize=(15, 12))
sns.scatterplot(data=df, y='used_price', x='new_price',
palette='ch:start=.5,rot=-.75',
hue='release_year', legend='full', alpha=.5);
plt.title("New vs Old Prices");
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
# What is the price difference between new and used?
df['percent_drop'] = 1 - df['used_price'] / df['new_price']
plt.figure(figsize=(8,5))
sns.pointplot(data=df, x='release_year', y='percent_drop')
plt.ylabel("Discount")
df.drop(['percent_drop'], axis=1, inplace=True)
# Phones before 2018 are discounted 60% or more on average!
# By comparison, phones from last year are only marked down an average of 30%
# Days used by OS
plt.figure(figsize=(25,10))
sns.boxplot(data=df, x='os', y='days_used',
#palette='magma_r',
#order=df.groupby(['brand_name'])['days_used'].mean().sort_values(ascending=False).index
);
plt.title("Days Used by Operating System");
# Days used by Year
plt.figure(figsize=(25,10))
sns.boxplot(data=df, x='release_year', y='days_used',
);
plt.title("Days Used by Release Year");
# How do the main and selfie cameras compare?
plt.figure(figsize=(13,8))
sns.scatterplot(data=df, x='main_camera_mp',y='selfie_camera_mp', hue='release_year',
palette='coolwarm', legend='full',)
plt.title("Main Camera vs Selfie Camera")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
# For all phones with 5g, do they have 4g also?
print(df[df['5g']=='yes']['4g'].unique())
['yes']
# How many phones don't have 4g or 5g?
df[df['4g']=='no'].shape
(1212, 15)
sns.pairplot(data=df, hue='4g')
# all the orange dots represent "3G and Below"
<seaborn.axisgrid.PairGrid at 0x1e78ca9a040>
Duplicate value check - Missing value treatment - Outlier treatment - Feature engineering - Data preparation for modeling
df.isnull().sum()
brand_name 0 os 0 screen_size 0 4g 0 5g 0 main_camera_mp 180 selfie_camera_mp 2 int_memory 10 ram 10 battery 6 weight 7 release_year 0 days_used 0 new_price 0 used_price 0 dtype: int64
# What are the int_memory and ram values for other Nokia phones in 2014 and 2017?
(df.loc[(df['brand_name']=='Nokia') &
((df['release_year']==2014) | (df['release_year']==2017))]
.groupby('release_year')['int_memory','ram'].describe())
# I think it's safe to replace all 10 with 32.0 and 4.0
| int_memory | ram | |||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
| release_year | ||||||||||||||||
| 2014 | 22.0 | 28.363636 | 10.965511 | 16.0 | 16.0 | 32.0 | 32.0 | 64.0 | 22.0 | 4.000 | 0.000000 | 4.00 | 4.0 | 4.0 | 4.0 | 4.0 |
| 2017 | 10.0 | 43.200000 | 34.605073 | 16.0 | 20.0 | 32.0 | 56.0 | 128.0 | 10.0 | 3.625 | 1.185854 | 0.25 | 4.0 | 4.0 | 4.0 | 4.0 |
# What are the weights for other XOLO phones?
df.loc[df['brand_name']=='XOLO'].groupby('release_year')['weight'].describe()
# Outside research says 2015 XOLO phones weigh 125
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| release_year | ||||||||
| 2013 | 24.0 | 175.116667 | 84.293572 | 116.0 | 140.0 | 148.85 | 160.25 | 480.0 |
| 2014 | 18.0 | 118.888889 | 4.714045 | 100.0 | 120.0 | 120.00 | 120.00 | 120.0 |
| 2015 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df[df['selfie_camera_mp'].isnull()]
# Are these two Google phones missing a selfie camera????
# Outside research says the 2018 Google phone (Pixel 3) has a selfie camera with 8MP
# The other one matches the Pixel 3 XL, which has an 8MP selfie camera as well
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1131 | Android | 15.72 | yes | no | 12.2 | NaN | 64.0 | 4.0 | 3430.0 | 184.0 | 2018 | 475 | 959.08 | 479.52 | |
| 1132 | Android | 13.49 | yes | no | 12.2 | NaN | 64.0 | 4.0 | 2915.0 | 148.0 | 2018 | 424 | 849.87 | 425.16 |
df[df['battery'].isnull()]
# The battery for the 3 Meizu are probably between 2400-3150mAh
# Outside research says: 3100 (147g), 3350(158g), 3140(145g)
# It looks like a Microsoft computer somehow got in the dataset...?
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1901 | Meizu | Android | 13.49 | yes | no | 13.0 | 5.0 | 16.0 | 4.0 | NaN | 145.0 | 2014 | 986 | 129.49 | 38.67 |
| 1903 | Meizu | Android | 13.49 | yes | no | 20.7 | 5.0 | 16.0 | 4.0 | NaN | 158.0 | 2014 | 1043 | 399.67 | 119.86 |
| 1904 | Meizu | Android | 18.42 | yes | no | 20.7 | 2.0 | 16.0 | 4.0 | NaN | 147.0 | 2014 | 1007 | 339.04 | 101.65 |
| 2037 | Microsoft | Windows | 26.35 | no | no | 5.0 | 3.5 | 32.0 | 4.0 | NaN | 675.9 | 2013 | 931 | 331.21 | 99.49 |
| 2140 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 81.0 | 2016 | 815 | 29.21 | 8.65 |
| 2141 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 82.6 | 2016 | 1023 | 38.78 | 11.42 |
# How many smart phones weigh more than a pound? (454g)
df.loc[df['weight']>454].describe()
# 118 phones!!!
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 118.000000 | 118.000000 | 118.000000 | 118.000000 | 118.000000 | 117.000000 | 118.000000 | 118.000000 | 118.000000 | 118.000000 | 118.000000 |
| mean | 25.905254 | 7.261864 | 3.319492 | 53.559322 | 4.008475 | 6658.952991 | 551.768644 | 2015.720339 | 698.500000 | 342.040424 | 152.202881 |
| std | 2.478659 | 3.543887 | 2.554007 | 130.980297 | 0.745307 | 2027.739652 | 97.326595 | 2.371071 | 246.042097 | 188.021090 | 117.907108 |
| min | 13.490000 | 0.300000 | 0.300000 | 16.000000 | 1.000000 | 1200.000000 | 456.000000 | 2013.000000 | 139.000000 | 99.960000 | 35.980000 |
| 25% | 25.560000 | 5.000000 | 1.300000 | 16.000000 | 4.000000 | 5770.500000 | 480.750000 | 2014.000000 | 549.000000 | 220.415000 | 78.005000 |
| 50% | 25.560000 | 8.000000 | 2.000000 | 32.000000 | 4.000000 | 7000.000000 | 510.000000 | 2015.000000 | 697.500000 | 298.550000 | 112.485000 |
| 75% | 26.040000 | 8.000000 | 5.000000 | 32.000000 | 4.000000 | 8000.000000 | 615.000000 | 2018.000000 | 901.250000 | 390.612500 | 186.972500 |
| max | 33.500000 | 16.000000 | 8.000000 | 1024.000000 | 6.000000 | 9720.000000 | 950.000000 | 2020.000000 | 1089.000000 | 1100.860000 | 749.840000 |
# Of those heavyweight phones, how many have missing values?
df.loc[df['weight']>454].isnull().any().sum()
1
# Which new cell phones costs more than $1800 / 1543€
df.loc[df['new_price']>1543]
# Some prices seem high, but they are reasonable enough considering the other specs
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 45 | Huawei | Android | 23.65 | yes | yes | 13.0 | 16.0 | 512.0 | 12.0 | 4500.0 | 198.0 | 2019 | 289 | 1699.36 | 1185.73 |
| 204 | Xiaomi | Android | 32.39 | yes | yes | 12.0 | 20.0 | 512.0 | 12.0 | 4050.0 | 241.0 | 2019 | 478 | 2498.24 | 1248.99 |
| 270 | Huawei | Android | 23.65 | yes | yes | 13.0 | 16.0 | 512.0 | 12.0 | 4500.0 | 198.0 | 2019 | 488 | 1701.54 | 850.85 |
| 1313 | Huawei | Android | 20.32 | yes | no | 13.0 | 16.0 | 512.0 | 4.0 | 4500.0 | 295.0 | 2019 | 434 | 2300.87 | 1150.41 |
| 1325 | Huawei | Android | 21.43 | yes | no | 13.0 | 24.0 | 256.0 | 4.0 | 4200.0 | 187.0 | 2018 | 365 | 1600.29 | 1039.60 |
| 2448 | Samsung | Android | 18.26 | yes | no | 12.0 | 9.0 | 512.0 | 4.0 | 4380.0 | 263.0 | 2019 | 428 | 1751.18 | 875.62 |
| 3320 | Huawei | Android | 20.32 | yes | yes | 10.5 | 16.0 | 512.0 | 8.0 | 4500.0 | 300.0 | 2020 | 248 | 2560.20 | 1916.54 |
| 3463 | Huawei | Android | 20.32 | yes | yes | 10.5 | 16.0 | 512.0 | 8.0 | 4500.0 | 300.0 | 2020 | 129 | 2560.20 | 1916.34 |
df[df['battery'].isnull()]
# These 3 are bizarre so they will be dropped
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1901 | Meizu | Android | 13.49 | yes | no | 13.0 | 5.0 | 16.0 | 4.0 | NaN | 145.0 | 2014 | 986 | 129.49 | 38.67 |
| 1903 | Meizu | Android | 13.49 | yes | no | 20.7 | 5.0 | 16.0 | 4.0 | NaN | 158.0 | 2014 | 1043 | 399.67 | 119.86 |
| 1904 | Meizu | Android | 18.42 | yes | no | 20.7 | 2.0 | 16.0 | 4.0 | NaN | 147.0 | 2014 | 1007 | 339.04 | 101.65 |
| 2037 | Microsoft | Windows | 26.35 | no | no | 5.0 | 3.5 | 32.0 | 4.0 | NaN | 675.9 | 2013 | 931 | 331.21 | 99.49 |
| 2140 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 81.0 | 2016 | 815 | 29.21 | 8.65 |
| 2141 | Nokia | Others | 5.72 | no | no | 0.3 | 0.3 | NaN | NaN | NaN | 82.6 | 2016 | 1023 | 38.78 | 11.42 |
# Is there a pattern of missingness for the missing main_camera_mp values?
df[df['main_camera_mp'].isnull()].sort_values(by='new_price')
# We don't necesarrily want to drop these rows completely because most of them are newer models and
# the dataset is predominately older data. Maybe we can just impute them.
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 145 | Realme | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 3.0 | 4000.0 | 166.0 | 2020 | 167 | 40.0800 | 30.34 |
| 370 | Realme | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 3.0 | 4000.0 | 166.0 | 2020 | 247 | 40.8500 | 30.68 |
| 65 | Lava | Android | 23.97 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3200.0 | 150.0 | 2020 | 356 | 79.9200 | 51.87 |
| 290 | Lava | Android | 23.97 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3200.0 | 150.0 | 2020 | 359 | 79.9400 | 51.77 |
| 200 | Xiaomi | Android | 18.73 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 5000.0 | 188.0 | 2020 | 321 | 80.8200 | 52.13 |
| 3341 | Motorola | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3000.0 | 160.0 | 2020 | 208 | 82.3225 | 60.90 |
| 3484 | Motorola | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3000.0 | 160.0 | 2020 | 249 | 82.3225 | 60.74 |
| 287 | Infinix | Android | 16.19 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 5000.0 | 185.0 | 2020 | 329 | 88.8800 | 57.95 |
| 199 | Xiaomi | Android | 18.73 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 5000.0 | 188.0 | 2020 | 265 | 89.6000 | 68.99 |
| 62 | Infinix | Android | 16.19 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 5000.0 | 185.0 | 2020 | 256 | 90.5100 | 66.43 |
| 60 | Infinix | Android | 28.26 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 6000.0 | 209.0 | 2020 | 245 | 99.2500 | 72.93 |
| 369 | Realme | Android | 16.03 | yes | no | NaN | 5.0 | 32.0 | 3.0 | 5000.0 | 195.0 | 2020 | 321 | 99.5300 | 65.02 |
| 371 | Realme | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 4000.0 | 166.0 | 2019 | 450 | 99.5900 | 49.75 |
| 63 | Infinix | Android | 16.19 | yes | no | NaN | 16.0 | 32.0 | 3.0 | 4000.0 | 178.0 | 2019 | 316 | 99.7000 | 64.51 |
| 285 | Infinix | Android | 28.26 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 6000.0 | 209.0 | 2020 | 320 | 100.0200 | 65.22 |
| 288 | Infinix | Android | 16.19 | yes | no | NaN | 16.0 | 32.0 | 3.0 | 4000.0 | 178.0 | 2019 | 356 | 100.0800 | 63.94 |
| 144 | Realme | Android | 16.03 | yes | no | NaN | 5.0 | 32.0 | 3.0 | 5000.0 | 195.0 | 2020 | 111 | 100.1400 | 76.16 |
| 146 | Realme | Android | 15.40 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 4000.0 | 166.0 | 2019 | 399 | 100.6200 | 65.48 |
| 202 | Xiaomi | Others | 8.73 | no | no | NaN | 16.0 | 64.0 | 6.0 | 420.0 | 208.0 | 2020 | 153 | 101.9500 | 75.34 |
| 3522 | Realme | Android | 16.03 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 5000.0 | 195.0 | 2020 | 279 | 106.1990 | 66.83 |
| 3379 | Realme | Android | 16.03 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 5000.0 | 195.0 | 2020 | 357 | 106.1990 | 67.78 |
| 286 | Infinix | Android | 16.19 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 5000.0 | 185.0 | 2020 | 173 | 110.1800 | 82.67 |
| 3407 | Xiaomi | Android | 23.65 | yes | no | NaN | 8.0 | 32.0 | 3.0 | 5020.0 | 198.0 | 2020 | 340 | 110.5000 | 72.53 |
| 61 | Infinix | Android | 16.19 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 5000.0 | 185.0 | 2020 | 173 | 111.2500 | 85.94 |
| 3524 | Realme | Android | 23.50 | yes | no | NaN | 8.0 | 32.0 | 3.0 | 5000.0 | 195.0 | 2020 | 113 | 118.9915 | 91.43 |
| 3381 | Realme | Android | 23.50 | yes | no | NaN | 8.0 | 32.0 | 3.0 | 5000.0 | 195.0 | 2020 | 278 | 118.9915 | 72.50 |
| 130 | Oppo | Android | 15.56 | yes | no | NaN | 8.0 | 64.0 | 3.0 | 4230.0 | 168.0 | 2020 | 273 | 119.4000 | 82.49 |
| 355 | Oppo | Android | 15.56 | yes | no | NaN | 8.0 | 64.0 | 3.0 | 4230.0 | 168.0 | 2020 | 267 | 119.8100 | 80.64 |
| 3483 | Motorola | Android | 16.03 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 5000.0 | 200.0 | 2020 | 277 | 122.4000 | 84.73 |
| 3340 | Motorola | Android | 16.03 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 5000.0 | 200.0 | 2020 | 352 | 122.4000 | 80.04 |
| 3339 | Motorola | Android | 15.56 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3550.0 | 185.0 | 2020 | 174 | 127.4915 | 95.61 |
| 3482 | Motorola | Android | 15.56 | yes | no | NaN | 5.0 | 32.0 | 2.0 | 3550.0 | 185.0 | 2020 | 230 | 127.4915 | 93.86 |
| 289 | Infinix | Android | 15.56 | yes | no | NaN | 16.0 | 32.0 | 2.0 | 4000.0 | 165.0 | 2019 | 497 | 129.8100 | 64.91 |
| 64 | Infinix | Android | 15.56 | yes | no | NaN | 16.0 | 32.0 | 2.0 | 4000.0 | 165.0 | 2019 | 468 | 130.4900 | 65.29 |
| 3411 | Xiaomi | Android | 23.65 | yes | no | NaN | 13.0 | 64.0 | 3.0 | 5020.0 | 199.0 | 2020 | 208 | 136.0000 | 104.57 |
| 3376 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 3.0 | 5000.0 | 199.0 | 2020 | 320 | 139.5190 | 90.68 |
| 3519 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 3.0 | 5000.0 | 199.0 | 2020 | 206 | 139.5190 | 101.97 |
| 3529 | Realme | Android | 15.72 | yes | no | NaN | 25.0 | 64.0 | 4.0 | 4045.0 | 172.0 | 2019 | 288 | 143.5650 | 107.38 |
| 3386 | Realme | Android | 15.72 | yes | no | NaN | 25.0 | 64.0 | 4.0 | 4045.0 | 172.0 | 2019 | 277 | 143.5650 | 98.09 |
| 3525 | Realme | Android | 16.03 | yes | no | NaN | 13.0 | 64.0 | 4.0 | 5000.0 | 198.0 | 2019 | 293 | 143.6500 | 105.42 |
| 3382 | Realme | Android | 16.03 | yes | no | NaN | 13.0 | 64.0 | 4.0 | 5000.0 | 198.0 | 2019 | 299 | 143.6500 | 95.52 |
| 3344 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5000.0 | 197.0 | 2020 | 202 | 147.9510 | 109.73 |
| 3487 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5000.0 | 197.0 | 2020 | 364 | 147.9510 | 94.40 |
| 1020 | Coolpad | Android | 15.56 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 4000.0 | 177.0 | 2018 | 655 | 149.4700 | 59.75 |
| 368 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 5000.0 | 199.0 | 2020 | 362 | 150.0200 | 98.10 |
| 143 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 5000.0 | 199.0 | 2020 | 338 | 150.5300 | 98.59 |
| 3417 | Xiaomi | Android | 25.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5020.0 | 209.0 | 2020 | 96 | 157.4710 | 121.89 |
| 160 | Vivo | Android | 23.65 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5000.0 | 204.1 | 2020 | 246 | 159.0000 | 118.22 |
| 3385 | Realme | Android | 15.72 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4035.0 | 184.0 | 2019 | 538 | 159.8850 | 79.92 |
| 3528 | Realme | Android | 15.72 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4035.0 | 184.0 | 2019 | 433 | 159.8850 | 80.00 |
| 418 | Coolpad | Android | 20.96 | yes | no | NaN | 13.0 | 32.0 | 3.0 | 4000.0 | 170.0 | 2019 | 342 | 161.6100 | 105.36 |
| 3342 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 4000.0 | 192.0 | 2020 | 186 | 165.7500 | 121.79 |
| 3485 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 4000.0 | 192.0 | 2020 | 176 | 165.7500 | 124.07 |
| 3338 | Motorola | Android | 15.88 | yes | no | NaN | 8.0 | 32.0 | 3.0 | 4000.0 | 189.4 | 2020 | 349 | 169.9915 | 111.11 |
| 3481 | Motorola | Android | 15.88 | yes | no | NaN | 8.0 | 32.0 | 3.0 | 4000.0 | 189.4 | 2020 | 101 | 169.9915 | 125.64 |
| 3408 | Xiaomi | Android | 23.65 | yes | no | NaN | 13.0 | 128.0 | 4.0 | 5020.0 | 199.0 | 2020 | 354 | 169.9915 | 111.07 |
| 3384 | Realme | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4000.0 | 183.0 | 2019 | 264 | 174.1650 | 132.17 |
| 3527 | Realme | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4000.0 | 183.0 | 2019 | 503 | 174.1650 | 86.88 |
| 326 | Meizu | Android | 16.03 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 4000.0 | 184.0 | 2019 | 198 | 179.7600 | 133.41 |
| 101 | Meizu | Android | 16.03 | yes | no | NaN | 8.0 | 32.0 | 2.0 | 4000.0 | 184.0 | 2019 | 190 | 180.4000 | 137.75 |
| 198 | Xiaomi | Android | 25.88 | yes | no | NaN | 32.0 | 64.0 | 6.0 | 5020.0 | 209.0 | 2020 | 256 | 181.0400 | 131.59 |
| 163 | Vivo | Android | 22.70 | yes | no | NaN | 8.0 | 128.0 | 4.0 | 5000.0 | 197.0 | 2020 | 118 | 190.5000 | 142.28 |
| 3410 | Xiaomi | Android | 25.88 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 5020.0 | 209.0 | 2020 | 284 | 192.9500 | 130.25 |
| 2782 | Sony | Android | 13.81 | yes | no | NaN | 8.0 | 32.0 | 4.0 | 3300.0 | 156.0 | 2019 | 195 | 198.1500 | 149.10 |
| 2291 | Panasonic | Android | 18.26 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 4000.0 | 169.0 | 2018 | 487 | 199.0800 | 99.55 |
| 201 | Xiaomi | Android | 25.88 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 4500.0 | 208.0 | 2020 | 169 | 199.4400 | 151.23 |
| 1858 | Meizu | Android | 15.56 | yes | no | NaN | 20.0 | 64.0 | 4.0 | 4000.0 | 170.0 | 2019 | 295 | 199.5600 | 138.09 |
| 367 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4300.0 | 191.0 | 2020 | 97 | 200.3200 | 144.67 |
| 195 | Xiaomi | Android | 24.29 | yes | yes | NaN | 16.0 | 64.0 | 6.0 | 4520.0 | 205.0 | 2020 | 301 | 200.6000 | 128.76 |
| 142 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4300.0 | 191.0 | 2020 | 289 | 200.7500 | 145.74 |
| 3380 | Realme | Android | 24.29 | yes | yes | NaN | 8.0 | 64.0 | 6.0 | 4200.0 | 202.0 | 2020 | 201 | 203.9915 | 152.38 |
| 3523 | Realme | Android | 24.29 | yes | yes | NaN | 8.0 | 64.0 | 6.0 | 4200.0 | 202.0 | 2020 | 282 | 203.9915 | 135.78 |
| 3520 | Realme | Android | 16.19 | yes | no | NaN | 8.0 | 64.0 | 6.0 | 4300.0 | 202.0 | 2020 | 231 | 211.6500 | 158.86 |
| 3377 | Realme | Android | 16.19 | yes | no | NaN | 8.0 | 64.0 | 6.0 | 4300.0 | 202.0 | 2020 | 347 | 211.6500 | 137.91 |
| 3397 | Vivo | Android | 23.65 | yes | no | NaN | 16.0 | 128.0 | 8.0 | 5000.0 | 197.0 | 2020 | 108 | 212.4830 | 157.19 |
| 3343 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5000.0 | 199.0 | 2020 | 97 | 212.4915 | 157.45 |
| 3486 | Motorola | Android | 15.88 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 5000.0 | 199.0 | 2020 | 156 | 212.4915 | 159.42 |
| 103 | Meizu | Android | 15.56 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 4000.0 | 165.0 | 2019 | 517 | 218.9100 | 109.51 |
| 328 | Meizu | Android | 15.56 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 4000.0 | 165.0 | 2019 | 499 | 220.4400 | 110.20 |
| 3412 | Xiaomi | Android | 22.70 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 5260.0 | 204.0 | 2020 | 200 | 226.0915 | 168.83 |
| 3375 | Realme | Android | 24.29 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 194.0 | 2020 | 232 | 234.6935 | 174.56 |
| 3518 | Realme | Android | 24.29 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 194.0 | 2020 | 258 | 234.6935 | 165.37 |
| 3395 | Vivo | Android | 23.65 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4500.0 | 190.0 | 2020 | 197 | 246.4830 | 185.34 |
| 3414 | Xiaomi | Android | 24.29 | yes | yes | NaN | 16.0 | 64.0 | 6.0 | 4160.0 | 192.0 | 2020 | 351 | 246.6785 | 159.48 |
| 196 | Xiaomi | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4800.0 | 208.0 | 2020 | 319 | 250.4200 | 163.38 |
| 325 | Meizu | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 6.0 | 4500.0 | 183.0 | 2019 | 348 | 251.1900 | 164.23 |
| 100 | Meizu | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 6.0 | 4500.0 | 183.0 | 2019 | 449 | 251.2200 | 125.57 |
| 3398 | Vivo | Android | 24.29 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 5000.0 | 201.0 | 2020 | 135 | 252.8665 | 188.48 |
| 197 | Xiaomi | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4500.0 | 208.0 | 2020 | 184 | 260.8100 | 196.45 |
| 166 | Vivo | Android | 22.22 | yes | no | NaN | 32.0 | 128.0 | 8.0 | 4500.0 | 176.0 | 2020 | 199 | 270.2100 | 204.63 |
| 3388 | Sony | Android | 15.24 | yes | no | NaN | 8.0 | 128.0 | 4.0 | 3600.0 | 151.0 | 2020 | 349 | 271.1500 | 174.88 |
| 194 | Xiaomi | Android | 24.29 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4520.0 | 208.0 | 2020 | 318 | 288.3700 | 188.10 |
| 353 | Oppo | Android | 24.29 | yes | yes | NaN | 32.0 | 128.0 | 6.0 | 4000.0 | 184.0 | 2020 | 122 | 289.4600 | 218.71 |
| 128 | Oppo | Android | 24.29 | yes | yes | NaN | 32.0 | 128.0 | 6.0 | 4000.0 | 184.0 | 2020 | 100 | 290.8600 | 215.75 |
| 3396 | Vivo | Android | 24.29 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4500.0 | 195.0 | 2020 | 352 | 293.2330 | 189.81 |
| 3413 | Xiaomi | Android | 24.29 | yes | yes | NaN | 16.0 | 64.0 | 6.0 | 4160.0 | 192.0 | 2020 | 156 | 296.6415 | 222.63 |
| 335 | Motorola | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 6.0 | 5000.0 | 210.0 | 2020 | 92 | 298.0600 | 222.31 |
| 736 | Asus | Android | 19.37 | yes | no | NaN | 8.0 | 32.0 | 4.0 | 4000.0 | 165.0 | 2019 | 524 | 299.2700 | 149.68 |
| 162 | Vivo | Android | 21.27 | yes | no | NaN | 16.0 | 128.0 | 8.0 | 4500.0 | 190.2 | 2020 | 300 | 299.7000 | 195.05 |
| 110 | Motorola | Android | 16.03 | yes | no | NaN | 16.0 | 128.0 | 6.0 | 5000.0 | 210.0 | 2020 | 242 | 300.3200 | 222.49 |
| 863 | BlackBerry | Android | 28.42 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4000.0 | 170.0 | 2018 | 383 | 301.6600 | 194.68 |
| 737 | Asus | Android | 19.37 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 4000.0 | 165.0 | 2019 | 397 | 310.5300 | 201.16 |
| 211 | ZTE | Android | 25.56 | yes | yes | NaN | 12.0 | 128.0 | 6.0 | 5100.0 | 210.0 | 2020 | 278 | 311.1100 | 209.72 |
| 159 | Vivo | Android | 22.22 | yes | no | NaN | 32.0 | 128.0 | 8.0 | 4500.0 | 176.0 | 2020 | 343 | 319.4800 | 206.54 |
| 861 | BlackBerry | Android | 10.95 | yes | no | NaN | 8.0 | 32.0 | 4.0 | 3000.0 | 156.0 | 2018 | 552 | 331.0600 | 165.55 |
| 3365 | Oppo | Android | 15.88 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4025.0 | 180.0 | 2020 | 306 | 339.1500 | 219.57 |
| 3508 | Oppo | Android | 15.88 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4025.0 | 180.0 | 2020 | 326 | 339.1500 | 218.95 |
| 102 | Meizu | Android | 15.56 | yes | no | NaN | 20.0 | 128.0 | 6.0 | 3600.0 | 166.0 | 2019 | 459 | 340.5900 | 170.34 |
| 327 | Meizu | Android | 15.56 | yes | no | NaN | 20.0 | 128.0 | 6.0 | 3600.0 | 166.0 | 2019 | 405 | 341.1600 | 170.70 |
| 862 | BlackBerry | Android | 28.42 | yes | no | NaN | 16.0 | 64.0 | 4.0 | 4000.0 | 170.0 | 2018 | 629 | 348.2900 | 139.37 |
| 2292 | Panasonic | Android | 18.10 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 3000.0 | 195.0 | 2018 | 717 | 349.4000 | 139.60 |
| 164 | Vivo | Android | 24.29 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4500.0 | 198.1 | 2020 | 312 | 349.8300 | 227.88 |
| 2781 | Sony | Android | 15.24 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 2870.0 | 162.0 | 2019 | 235 | 351.6300 | 262.70 |
| 3409 | Xiaomi | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4700.0 | 219.0 | 2020 | 128 | 358.7000 | 272.45 |
| 417 | Coolpad | Android | 23.65 | yes | yes | NaN | 16.0 | 64.0 | 4.0 | 4000.0 | 195.0 | 2020 | 252 | 359.7700 | 258.53 |
| 3526 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 4000.0 | 199.0 | 2019 | 181 | 365.4150 | 273.29 |
| 3383 | Realme | Android | 16.03 | yes | no | NaN | 16.0 | 64.0 | 6.0 | 4000.0 | 199.0 | 2019 | 475 | 365.4150 | 182.74 |
| 127 | Oppo | Android | 22.07 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4000.0 | 183.0 | 2020 | 271 | 368.8000 | 260.19 |
| 352 | Oppo | Android | 22.07 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4000.0 | 183.0 | 2020 | 281 | 369.3000 | 257.34 |
| 3363 | OnePlus | Android | 23.97 | yes | no | NaN | 16.0 | 128.0 | 8.0 | 3800.0 | 190.0 | 2019 | 381 | 382.4915 | 249.32 |
| 3506 | OnePlus | Android | 23.97 | yes | no | NaN | 16.0 | 128.0 | 8.0 | 3800.0 | 190.0 | 2019 | 476 | 382.4915 | 191.21 |
| 3389 | Sony | Android | 15.40 | yes | no | NaN | 8.0 | 128.0 | 6.0 | 3140.0 | 164.0 | 2019 | 348 | 392.6915 | 254.39 |
| 3374 | Realme | Android | 22.22 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 209.0 | 2020 | 148 | 395.2415 | 298.10 |
| 3517 | Realme | Android | 22.22 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 209.0 | 2020 | 219 | 395.2415 | 297.12 |
| 131 | Oppo | Android | 15.88 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4025.0 | 180.0 | 2020 | 146 | 398.7700 | 297.87 |
| 356 | Oppo | Android | 15.88 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4025.0 | 180.0 | 2020 | 218 | 399.5500 | 299.98 |
| 3416 | Xiaomi | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4700.0 | 218.0 | 2020 | 306 | 413.0915 | 270.33 |
| 104 | Meizu | Android | 15.56 | yes | no | NaN | 20.0 | 128.0 | 6.0 | 3600.0 | 165.0 | 2019 | 332 | 420.1700 | 272.02 |
| 329 | Meizu | Android | 15.56 | yes | no | NaN | 20.0 | 128.0 | 6.0 | 3600.0 | 165.0 | 2019 | 226 | 421.1200 | 318.15 |
| 2780 | Sony | Android | 16.03 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 3000.0 | 180.0 | 2019 | 252 | 429.1000 | 295.97 |
| 126 | Oppo | Android | 23.97 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4000.0 | 172.0 | 2020 | 187 | 470.3400 | 356.01 |
| 351 | Oppo | Android | 23.97 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4000.0 | 172.0 | 2020 | 293 | 470.8100 | 335.09 |
| 3421 | Xiaomi | Android | 25.88 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4780.0 | 208.0 | 2020 | 321 | 472.5915 | 308.77 |
| 3521 | Realme | Android | 22.22 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 205.0 | 2020 | 141 | 480.7430 | 359.14 |
| 3378 | Realme | Android | 22.22 | yes | yes | NaN | 8.0 | 128.0 | 6.0 | 4200.0 | 205.0 | 2020 | 257 | 480.7430 | 342.84 |
| 3399 | Vivo | Android | 22.22 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4400.0 | 214.5 | 2020 | 154 | 488.6650 | 363.41 |
| 3441 | ZTE | Android | 25.56 | yes | yes | NaN | 8.0 | 128.0 | 8.0 | 4500.0 | 218.0 | 2020 | 133 | 492.1500 | 369.60 |
| 2205 | OnePlus | Android | 13.49 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 3300.0 | 153.0 | 2017 | 875 | 499.2100 | 149.69 |
| 2204 | OnePlus | Android | 15.40 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 3300.0 | 162.0 | 2017 | 623 | 500.1800 | 199.99 |
| 3419 | Xiaomi | Android | 25.88 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4720.0 | 222.0 | 2020 | 164 | 500.6500 | 377.16 |
| 864 | BlackBerry | Android | 10.95 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 3500.0 | 168.0 | 2018 | 471 | 501.4200 | 250.66 |
| 3507 | Oppo | Android | 16.03 | yes | yes | NaN | 32.0 | 256.0 | 12.0 | 4025.0 | 171.0 | 2020 | 348 | 509.1500 | 328.72 |
| 3440 | ZTE | Android | 22.70 | yes | yes | NaN | 20.0 | 128.0 | 6.0 | 4000.0 | 168.0 | 2020 | 217 | 509.1500 | 379.80 |
| 3364 | Oppo | Android | 16.03 | yes | yes | NaN | 32.0 | 256.0 | 12.0 | 4025.0 | 171.0 | 2020 | 227 | 509.1500 | 380.74 |
| 3415 | Xiaomi | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4700.0 | 218.0 | 2020 | 194 | 518.4915 | 386.79 |
| 2203 | OnePlus | Android | 19.69 | yes | no | NaN | 16.0 | 128.0 | 4.0 | 3300.0 | 177.0 | 2018 | 386 | 519.3800 | 336.78 |
| 129 | Oppo | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4000.0 | 186.0 | 2020 | 186 | 519.8200 | 391.59 |
| 354 | Oppo | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4000.0 | 186.0 | 2020 | 200 | 520.1600 | 388.72 |
| 324 | Meizu | Android | 16.19 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4500.0 | 199.0 | 2020 | 318 | 529.2900 | 344.66 |
| 99 | Meizu | Android | 16.19 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4500.0 | 199.0 | 2020 | 186 | 529.5500 | 398.56 |
| 3394 | Vivo | Android | 24.13 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4315.0 | 181.5 | 2020 | 165 | 545.5725 | 407.09 |
| 2202 | OnePlus | Android | 21.75 | yes | no | NaN | 16.0 | 256.0 | 4.0 | 3700.0 | 185.0 | 2018 | 496 | 549.7400 | 274.99 |
| 3362 | OnePlus | Android | 25.88 | yes | no | NaN | 16.0 | 256.0 | 8.0 | 4085.0 | 206.0 | 2019 | 341 | 551.6075 | 358.34 |
| 3505 | OnePlus | Android | 25.88 | yes | no | NaN | 16.0 | 256.0 | 8.0 | 4085.0 | 206.0 | 2019 | 367 | 551.6075 | 359.07 |
| 210 | ZTE | Android | 25.56 | yes | yes | NaN | 12.0 | 256.0 | 8.0 | 5100.0 | 215.0 | 2020 | 235 | 598.6600 | 448.34 |
| 3480 | Meizu | Android | 16.19 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4500.0 | 219.0 | 2020 | 139 | 600.0915 | 451.07 |
| 3337 | Meizu | Android | 16.19 | yes | yes | NaN | 20.0 | 128.0 | 8.0 | 4500.0 | 219.0 | 2020 | 261 | 600.0915 | 412.08 |
| 161 | Vivo | Android | 24.13 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4315.0 | 181.5 | 2020 | 145 | 630.2500 | 470.69 |
| 165 | Vivo | Android | 29.37 | yes | yes | NaN | 16.0 | 256.0 | 8.0 | 4500.0 | 219.5 | 2020 | 226 | 630.9900 | 472.89 |
| 123 | OnePlus | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4300.0 | 180.0 | 2020 | 208 | 648.1400 | 490.04 |
| 348 | OnePlus | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4300.0 | 180.0 | 2020 | 232 | 651.0100 | 489.18 |
| 3566 | Asus | Android | 15.88 | yes | no | NaN | 8.0 | 64.0 | 6.0 | 5000.0 | 190.0 | 2019 | 232 | 654.5000 | 490.96 |
| 3418 | Xiaomi | Android | 17.94 | yes | yes | NaN | 20.0 | 256.0 | 8.0 | 5000.0 | 253.0 | 2020 | 361 | 671.4915 | 436.77 |
| 2201 | OnePlus | Android | 21.75 | yes | no | NaN | 16.0 | 256.0 | 4.0 | 3700.0 | 185.0 | 2018 | 489 | 700.9400 | 350.47 |
| 3420 | Xiaomi | Android | 25.88 | yes | yes | NaN | 20.0 | 256.0 | 8.0 | 4500.0 | 208.0 | 2020 | 112 | 713.9915 | 535.92 |
| 347 | OnePlus | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4300.0 | 180.0 | 2020 | 186 | 738.8200 | 553.77 |
| 122 | OnePlus | Android | 23.97 | yes | yes | NaN | 16.0 | 128.0 | 8.0 | 4300.0 | 180.0 | 2020 | 110 | 741.9700 | 559.03 |
| 350 | OnePlus | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4000.0 | 206.0 | 2019 | 523 | 799.8000 | 400.00 |
| 349 | OnePlus | Android | 25.88 | yes | yes | NaN | 16.0 | 256.0 | 12.0 | 4085.0 | 206.0 | 2019 | 280 | 800.9200 | 553.59 |
| 124 | OnePlus | Android | 25.88 | yes | yes | NaN | 16.0 | 256.0 | 12.0 | 4085.0 | 206.0 | 2019 | 458 | 801.2400 | 400.43 |
| 125 | OnePlus | Android | 25.88 | yes | yes | NaN | 16.0 | 128.0 | 6.0 | 4000.0 | 206.0 | 2019 | 294 | 801.6300 | 568.56 |
| 3367 | Oppo | Android | 16.35 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4200.0 | 209.0 | 2020 | 91 | 824.4915 | 613.87 |
| 3510 | Oppo | Android | 16.35 | yes | yes | NaN | 32.0 | 128.0 | 8.0 | 4200.0 | 209.0 | 2020 | 261 | 824.4915 | 566.32 |
| 3387 | Sony | Android | 16.03 | yes | yes | NaN | 8.0 | 256.0 | 8.0 | 4000.0 | 181.4 | 2020 | 236 | 934.1500 | 698.61 |
| 3509 | Oppo | Android | 16.35 | yes | yes | NaN | 32.0 | 256.0 | 12.0 | 4260.0 | 217.0 | 2020 | 159 | 934.9915 | 702.85 |
| 3366 | Oppo | Android | 16.35 | yes | yes | NaN | 32.0 | 256.0 | 12.0 | 4260.0 | 217.0 | 2020 | 193 | 934.9915 | 699.64 |
| 2779 | Sony | Android | 16.03 | yes | no | NaN | 8.0 | 64.0 | 4.0 | 3330.0 | 180.0 | 2019 | 328 | 999.7400 | 650.33 |
| 3565 | Asus | Android | 24.61 | yes | no | NaN | 24.0 | 128.0 | 8.0 | 6000.0 | 240.0 | 2019 | 325 | 1163.6500 | 756.99 |
| 336 | Motorola | Android | 16.35 | yes | yes | NaN | 25.0 | 256.0 | 12.0 | 5000.0 | 203.0 | 2020 | 188 | 1197.4900 | 894.04 |
| 111 | Motorola | Android | 16.35 | yes | yes | NaN | 25.0 | 256.0 | 12.0 | 5000.0 | 203.0 | 2020 | 220 | 1200.8500 | 901.27 |
df[df['main_camera_mp'].isnull()].groupby(['release_year','brand_name'])['release_year'].count()
# Phones with missing main_camera_mp values are mostly newer models
release_year brand_name
2017 OnePlus 2
2018 BlackBerry 4
Coolpad 1
OnePlus 3
Panasonic 2
2019 Asus 4
Coolpad 1
Infinix 4
Meizu 11
OnePlus 8
Realme 12
Sony 5
2020 Coolpad 1
Infinix 6
Lava 2
Meizu 4
Motorola 18
OnePlus 4
Oppo 20
Realme 24
Sony 2
Vivo 14
Xiaomi 24
ZTE 4
Name: release_year, dtype: int64
# What's the average main_camera_mp per brand in 2020?
print(df.groupby(['release_year'])['main_camera_mp','selfie_camera_mp','new_price'].median())
df[df['release_year']>2017].groupby(['release_year','brand_name'])['main_camera_mp','selfie_camera_mp','new_price'].max()
# 2020 Oppo main cameras, upon research: 12MP, 48MP, 13MP, 48MP, 48MP
# If it's from 2020 and the new price > 500, then main_mp = 48mp
# If it's from 2019 and the new price > 900, then main_mp = 48mp
# If new price < 100, then main_mp = 1
# If new price < 200, then main_mp = 8
# ELSE main_mp = 12
main_camera_mp selfie_camera_mp new_price release_year 2013 5.0 1.0 141.12000 2014 8.0 2.0 169.94500 2015 8.0 5.0 171.31000 2016 13.0 5.0 198.19000 2017 13.0 8.0 200.97000 2018 13.0 8.0 250.05000 2019 12.0 16.0 237.99150 2020 8.0 13.0 237.14675
| main_camera_mp | selfie_camera_mp | new_price | ||
|---|---|---|---|---|
| release_year | brand_name | |||
| 2018 | Acer | 5.0 | 2.0 | 328.3200 |
| Alcatel | 13.0 | 8.0 | 230.4500 | |
| Apple | 12.0 | 7.0 | 1250.3200 | |
| Asus | 13.0 | 8.0 | 949.2700 | |
| BlackBerry | NaN | 16.0 | 501.4200 | |
| Coolpad | NaN | 8.0 | 149.4700 | |
| Gionee | 8.0 | 5.0 | 131.6300 | |
| 12.2 | NaN | 959.0800 | ||
| HTC | 13.0 | 13.0 | 650.8800 | |
| Honor | 13.0 | 25.0 | 570.6800 | |
| Huawei | 13.0 | 25.0 | 1600.2900 | |
| LG | 16.0 | 8.0 | 802.0100 | |
| Lenovo | 13.0 | 16.0 | 400.4400 | |
| Meizu | 16.0 | 20.0 | 481.4200 | |
| Motorola | 13.0 | 12.0 | 399.5400 | |
| Nokia | 16.0 | 20.0 | 549.8200 | |
| OnePlus | NaN | 16.0 | 700.9400 | |
| Oppo | 16.0 | 25.0 | 1352.4200 | |
| Others | 16.0 | 20.0 | 899.9500 | |
| Panasonic | NaN | 16.0 | 349.4000 | |
| Realme | 13.0 | 25.0 | 210.6000 | |
| Samsung | 16.0 | 24.0 | 748.9900 | |
| Sony | 23.0 | 13.0 | 898.4000 | |
| Vivo | 13.0 | 25.0 | 700.7800 | |
| Xiaomi | 13.0 | 24.0 | 559.3300 | |
| ZTE | 24.0 | 20.0 | 650.7800 | |
| 2019 | Alcatel | 5.0 | 8.0 | 150.8100 |
| Apple | 8.0 | 7.0 | 849.1700 | |
| Asus | NaN | 24.0 | 1163.6500 | |
| Coolpad | NaN | 13.0 | 161.6100 | |
| 12.2 | 8.0 | 577.1500 | ||
| HTC | 13.0 | 16.0 | 419.8800 | |
| Honor | 13.0 | 32.0 | 501.7600 | |
| Huawei | 13.0 | 32.0 | 2300.8700 | |
| Infinix | NaN | 16.0 | 130.4900 | |
| LG | 16.0 | 32.0 | 1100.3100 | |
| Lenovo | 13.0 | 32.0 | 402.0700 | |
| Meizu | NaN | 20.0 | 421.1200 | |
| Motorola | 48.0 | 25.0 | 298.1700 | |
| Nokia | 13.0 | 20.0 | 619.7000 | |
| OnePlus | NaN | 16.0 | 801.6300 | |
| Oppo | 8.0 | 32.0 | 880.8000 | |
| Others | 22.6 | 24.0 | 900.8700 | |
| Realme | NaN | 25.0 | 365.4150 | |
| Samsung | 16.0 | 32.0 | 1751.1800 | |
| Sony | NaN | 8.0 | 999.7400 | |
| Vivo | 13.0 | 32.0 | 731.3300 | |
| Xiaomi | 13.0 | 32.0 | 2498.2400 | |
| ZTE | 48.0 | 20.0 | 819.5700 | |
| 2020 | Alcatel | 13.0 | 5.0 | 101.9150 |
| Apple | 12.0 | 7.0 | 999.5800 | |
| Coolpad | NaN | 16.0 | 359.7700 | |
| Honor | 13.0 | 32.0 | 634.0915 | |
| Huawei | 13.0 | 32.0 | 2560.2000 | |
| Infinix | NaN | 8.0 | 111.2500 | |
| LG | 8.0 | 16.0 | 870.0100 | |
| Lava | NaN | 5.0 | 79.9400 | |
| Lenovo | 8.0 | 5.0 | 181.5200 | |
| Meizu | NaN | 20.0 | 600.0915 | |
| Motorola | NaN | 25.0 | 1200.8500 | |
| Nokia | 8.0 | 24.0 | 501.1200 | |
| OnePlus | NaN | 16.0 | 741.9700 | |
| Oppo | NaN | 32.0 | 934.9915 | |
| Others | 13.0 | 24.0 | 670.6800 | |
| Realme | NaN | 16.0 | 480.7430 | |
| Samsung | 8.0 | 32.0 | 1054.4505 | |
| Sony | NaN | 8.0 | 934.1500 | |
| Vivo | NaN | 32.0 | 630.9900 | |
| Xiaomi | NaN | 32.0 | 713.9915 | |
| ZTE | NaN | 20.0 | 598.6600 |
# Which phones have cameras above 30MP?
df.loc[(df['main_camera_mp']>40) | (df['selfie_camera_mp']>40)].sort_values(by='release_year')
# We can believe that newer phones might have smaller cameras, but it's
# hard to believe that older phones have larger cameras.
# The top one from 2013 looks suspicious, but I looked it up and it's factual https://en.wikipedia.org/wiki/Nokia_Lumia_1020
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2186 | Nokia | Windows | 10.95 | yes | no | 41.0 | 1.2 | 32.0 | 4.0 | 2000.0 | 158.0 | 2013 | 988 | 399.1300 | 119.75 |
| 216 | ZTE | Android | 25.56 | yes | no | 48.0 | 16.0 | 128.0 | 8.0 | 5000.0 | 215.0 | 2019 | 336 | 819.5700 | 533.34 |
| 221 | ZTE | Android | 25.56 | yes | no | 48.0 | 16.0 | 64.0 | 6.0 | 5000.0 | 215.0 | 2019 | 506 | 501.5600 | 250.83 |
| 3354 | Motorola | Android | 15.88 | yes | no | 48.0 | 25.0 | 128.0 | 4.0 | 3600.0 | 165.0 | 2019 | 371 | 254.9915 | 164.69 |
| 3497 | Motorola | Android | 15.88 | yes | no | 48.0 | 25.0 | 128.0 | 4.0 | 3600.0 | 165.0 | 2019 | 422 | 254.9915 | 127.72 |
df['int_memory'].fillna(32.0, inplace=True)
df['ram'].fillna(4.0, inplace=True)
df.iloc[3113:3120,10].fillna(125.0, inplace=True) # all weights for Nokia
df.iloc[1131:1133,6].fillna(8.0, inplace=True) # selfie_camera_mp for Google phones
df.iloc[1901,9]=3140 # Meizu battery
df.iloc[1903,9]=3350 # Meizu battery
df.iloc[1904,9]=3100 # Meizu battery
# If it's from 2020 and the new price > 500, then main_mp = 48mp
# If it's from 2019 and the new price > 900, then main_mp = 20mp
# If new price < 100, then main_mp = 1
# If new price < 200, then main_mp = 6
# ELSE main_mp = 12
df.loc[(df['release_year']==2020) & (df['new_price']>500),'main_camera_mp'].fillna(48, inplace=True)
df.loc[(df['release_year']==2019) & (df['new_price']>900), 'main_camera_mp'].fillna(20, inplace=True)
df.loc[df['new_price']<100,'main_camera_mp'].fillna(1, inplace=True)
df.loc[df['new_price']<200,'main_camera_mp'].fillna(6, inplace=True)
df['main_camera_mp'].fillna(12, inplace=True)
# drop the remaining 3 rows with missing values
df = df.dropna()
# Battery Outlier? Drop it
print(df[df['battery']>10000])
df.drop(387,inplace=True)
print(df.isnull().sum())
print(df.shape)
print(df.info())
brand_name os screen_size 4g 5g main_camera_mp \
387 Samsung Android 43.66 yes no 12.0
selfie_camera_mp int_memory ram battery weight release_year \
387 5.0 64.0 3.0 12000.0 23.0 2019
days_used new_price used_price
387 266 659.47 460.58
brand_name 0
os 0
screen_size 0
4g 0
5g 0
main_camera_mp 0
selfie_camera_mp 0
int_memory 0
ram 0
battery 0
weight 0
release_year 0
days_used 0
new_price 0
used_price 0
dtype: int64
(3567, 15)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3567 entries, 0 to 3570
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 brand_name 3567 non-null object
1 os 3567 non-null object
2 screen_size 3567 non-null float64
3 4g 3567 non-null object
4 5g 3567 non-null object
5 main_camera_mp 3567 non-null float64
6 selfie_camera_mp 3567 non-null float64
7 int_memory 3567 non-null float64
8 ram 3567 non-null float64
9 battery 3567 non-null float64
10 weight 3567 non-null float64
11 release_year 3567 non-null int64
12 days_used 3567 non-null int64
13 new_price 3567 non-null float64
14 used_price 3567 non-null float64
dtypes: float64(9), int64(2), object(4)
memory usage: 445.9+ KB
None
# We already dropped the most extreme outlier.
# Now let's try dropping other outliers
# We don't want ram to be affected, so we'll change it to a category and then back to float
df['ram'] = df['ram'].astype('category')
def treat_outliers(df, col):
"""
treats outliers in a variable
col: str, name of the numerical variable
df: dataframe
col: name of the column
"""
Q1 = df[col].quantile(0.25) # 25th quantile
Q3 = df[col].quantile(0.75) # 75th quantile
IQR = Q3 - Q1
Lower_Whisker = Q1 - 2 * IQR
Upper_Whisker = Q3 + 2 * IQR
# all the values smaller than Lower_Whisker will be assigned the value of Lower_Whisker
# all the values greater than Upper_Whisker will be assigned the value of Upper_Whisker
df[col] = np.clip(df[col], Lower_Whisker, Upper_Whisker)
return df
def treat_outliers_all(df, col_list):
"""
treat outlier in all numerical variables
col_list: list of numerical variables
df: data frame
"""
for c in col_list:
df = treat_outliers(df, c)
return df
# treating the outliers
numerical_col = df.select_dtypes(include=np.number).columns.tolist()
df = treat_outliers_all(df, numerical_col)
df.shape
df['ram'] = df['ram'].astype('float64')
# Now to change 4g and 5g to 1s and 0s............
df['4g'] = df['4g'].map(dict(yes=1, no=0))
df['5g'] = df['5g'].map(dict(yes=1, no=0))
df["4g"] = df["4g"].astype('int64')
df["5g"] = df["5g"].astype('int64')
df[['4g','5g']].info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3567 entries, 0 to 3570 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 4g 3567 non-null int64 1 5g 3567 non-null int64 dtypes: int64(2) memory usage: 83.6 KB
plt.figure(figsize = (25,12))
sns.heatmap(df.corr(), annot=True, vmin=-1, vmax=1, cmap='Spectral');
print(abs(df.corr())['used_price'].sort_values(ascending=False))
used_price 1.000000 new_price 0.910208 selfie_camera_mp 0.633421 int_memory 0.583849 days_used 0.551348 release_year 0.537556 ram 0.505263 screen_size 0.490430 5g 0.473231 battery 0.465321 4g 0.453054 main_camera_mp 0.422298 weight 0.311568 Name: used_price, dtype: float64
for i in df.columns:
sns.histplot(data=df, x=i, kde=True)
plt.show()
# plt.figure(figsize=(25,12))
# sns.pairplot(data=df)
# We will remove brand_name and OS
df.drop(['brand_name'], axis=1, inplace=True)
df.drop(['os'], axis=1, inplace=True)
df.head()
| screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 23.89 | 1 | 0 | 13.0 | 5.0 | 64.0 | 3.0 | 3020.0 | 146.0 | 2020 | 127 | 111.62 | 86.96 |
| 1 | 23.89 | 1 | 1 | 13.0 | 16.0 | 128.0 | 8.0 | 4300.0 | 213.0 | 2020 | 325 | 249.39 | 161.49 |
| 2 | 23.89 | 1 | 1 | 13.0 | 8.0 | 128.0 | 8.0 | 4200.0 | 213.0 | 2020 | 162 | 359.47 | 268.55 |
| 3 | 23.89 | 1 | 1 | 13.0 | 8.0 | 64.0 | 6.0 | 7250.0 | 272.0 | 2020 | 345 | 278.93 | 180.23 |
| 4 | 15.72 | 1 | 0 | 13.0 | 8.0 | 64.0 | 3.0 | 5000.0 | 185.0 | 2020 | 293 | 140.87 | 103.80 |
# Then we split the data, build the model, and test it
X = df.drop(['used_price'], axis=1)
y = df['used_price'].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=55)
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)
print(regression_model.score(X_train, y_train))
print(regression_model.score(X_test, y_test))
0.953073372668335 0.9578846114455567
# This model captures 95% of the test data!!!
# NOTE: We tested this model without the IQR Outlier treatment as well, and the model was still 95%,
# so we are keeping the outlier treatment
# Let's check the Adjusted R Squared and more
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# function to compute adjusted R-squared
def adj_r2_score(predictors, targets, predictions):
r2 = r2_score(targets, predictions)
n = predictors.shape[0]
k = predictors.shape[1]
return 1 - ((1 - r2) * (n - 1) / (n - k - 1))
# function to compute MAPE
def mape_score(targets, predictions):
return np.mean(np.abs(targets - predictions) / targets) * 100
# function to compute different metrics to check performance of a regression model
def model_performance_regression(model, predictors, target):
"""
Function to compute different metrics to check regression model performance
model: regressor
predictors: independent variables
target: dependent variable
"""
# predicting using the independent variables
pred = model.predict(predictors)
r2 = r2_score(target, pred) # to compute R-squared
adjr2 = adj_r2_score(predictors, target, pred) # to compute adjusted R-squared
rmse = np.sqrt(mean_squared_error(target, pred)) # to compute RMSE
mae = mean_absolute_error(target, pred) # to compute MAE
mape = mape_score(target, pred) # to compute MAPE
# creating a dataframe of metrics
df_perf = pd.DataFrame(
{
"RMSE": rmse,
"MAE": mae,
"R-squared": r2,
"Adj. R-squared": adjr2,
"MAPE": mape,
},
index=[0],
)
return df_perf
# checking model performance on train set (seen 70% data)
print("Training Performance\n")
linearregression_train_perf = model_performance_regression(
regression_model, X_train, y_train
)
print(linearregression_train_perf)
print()
# checking model performance on test set (seen 30% data)
print("Test Performance\n")
linearregression_test_perf = model_performance_regression(
regression_model, X_test, y_test
)
print(linearregression_test_perf)
Training Performance
RMSE MAE R-squared Adj. R-squared MAPE
0 15.662613 11.18816 0.953073 0.952847 19.54726
Test Performance
RMSE MAE R-squared Adj. R-squared MAPE
0 15.300164 10.861353 0.957885 0.957407 17.898905
from statsmodels.stats.outliers_influence import variance_inflation_factor
# we will define a function to check VIF
def checking_vif(predictors):
vif = pd.DataFrame()
vif["feature"] = predictors.columns
# calculating VIF for each feature
vif["VIF"] = [
variance_inflation_factor(predictors.values, i)
for i in range(len(predictors.columns))
]
return vif
checking_vif(X_train)
| feature | VIF | |
|---|---|---|
| 0 | screen_size | 32.442367 |
| 1 | 4g | 5.520042 |
| 2 | 5g | 1.690646 |
| 3 | main_camera_mp | 9.938121 |
| 4 | selfie_camera_mp | 6.041559 |
| 5 | int_memory | 4.053550 |
| 6 | ram | 15.128273 |
| 7 | battery | 22.075739 |
| 8 | weight | 40.634631 |
| 9 | release_year | 50.048518 |
| 10 | days_used | 14.357589 |
| 11 | new_price | 6.458663 |
There are up to 10 features with a VIF over 5. The largest is release_year. So let's drop them one by one and check the Adjusted R Squared on the training data
y_train.head()
1472 68.37 127 260.19 907 44.88 2200 92.35 1216 74.99 Name: used_price, dtype: float64
def dropRetest(cols):
for i in cols:
X_train_1 = X_train.drop([i], axis=1)
regression_model_1 = LinearRegression()
regression_model_1.fit(X_train_1, y_train)
linearregression_train_perf_1 = model_performance_regression(regression_model_1, X_train_1, y_train)
print('Removing '+i)
print(linearregression_train_perf_1)
print()
cols=['release_year','battery','screen_size','ram','weight','days_used','main_camera_mp']
dropRetest(cols)
Removing release_year
RMSE MAE R-squared Adj. R-squared MAPE
0 15.666531 11.185398 0.95305 0.952842 19.513303
Removing battery
RMSE MAE R-squared Adj. R-squared MAPE
0 15.662936 11.189263 0.953071 0.952864 19.548804
Removing screen_size
RMSE MAE R-squared Adj. R-squared MAPE
0 15.663547 11.18734 0.953068 0.95286 19.548575
Removing ram
RMSE MAE R-squared Adj. R-squared MAPE
0 15.724886 11.246621 0.952699 0.95249 19.715815
Removing weight
RMSE MAE R-squared Adj. R-squared MAPE
0 15.664573 11.188646 0.953062 0.952854 19.550897
Removing days_used
RMSE MAE R-squared Adj. R-squared MAPE
0 21.279293 15.961124 0.913383 0.912999 21.861876
Removing main_camera_mp
RMSE MAE R-squared Adj. R-squared MAPE
0 15.687998 11.216255 0.952921 0.952713 19.649139
We don't want to remove days_used because it hurt the Adj R-squared the most. Instead we will remove battery and then weight and check the VIFs
X_train_2 = X_train.drop(['battery'], axis=1)
checking_vif(X_train_2)
| feature | VIF | |
|---|---|---|
| 0 | screen_size | 29.066443 |
| 1 | 4g | 5.284076 |
| 2 | 5g | 1.688539 |
| 3 | main_camera_mp | 9.876165 |
| 4 | selfie_camera_mp | 6.035672 |
| 5 | int_memory | 4.052941 |
| 6 | ram | 15.127731 |
| 7 | weight | 29.104788 |
| 8 | release_year | 48.779558 |
| 9 | days_used | 14.232892 |
| 10 | new_price | 6.448631 |
X_train_2 = X_train.drop(['weight'], axis=1)
checking_vif(X_train_2)
| feature | VIF | |
|---|---|---|
| 0 | screen_size | 27.857051 |
| 1 | 4g | 5.497355 |
| 2 | 5g | 1.690634 |
| 3 | main_camera_mp | 9.654145 |
| 4 | selfie_camera_mp | 5.964633 |
| 5 | int_memory | 4.053125 |
| 6 | ram | 15.125880 |
| 7 | battery | 15.811876 |
| 8 | release_year | 45.706727 |
| 9 | days_used | 14.187948 |
| 10 | new_price | 6.406424 |
Battery and Weight are related, but removing them didn't change the VIF of any of the other features, so let's check the model score on the TRAINING set by removing them both
X_train_2 = X_train.drop(['battery','weight'], axis=1)
regression_model_2 = LinearRegression()
regression_model_2.fit(X_train_2, y_train)
linearregression_train_perf_2 = model_performance_regression(regression_model_2, X_train_2, y_train)
print('Removing both battery and weight')
print(linearregression_train_perf_2)
print()
Removing both battery and weight
RMSE MAE R-squared Adj. R-squared MAPE
0 15.664621 11.188262 0.953061 0.952872 19.550816
# Let's check the TEST set by removing weight and battery too
X_test_2 = X_test.drop(['battery','weight'], axis=1)
linearregression_test_perf_2 = model_performance_regression(regression_model_2, X_test_2, y_test)
print('Removing both battery and weight')
print(linearregression_test_perf_2)
print()
Removing both battery and weight
RMSE MAE R-squared Adj. R-squared MAPE
0 15.292189 10.85801 0.957929 0.957532 17.882971
Removing both battery and weight resulted in better scores. Moving on.
import statsmodels.api as sm
# unlike sklearn, statsmodels does not add a constant to the data on its own
# we have to add the constant manually
x_train1 = sm.add_constant(X_train_2)
# adding constant to the test data
x_test1 = sm.add_constant(X_test_2)
olsmod2 = sm.OLS(y_train, x_train1).fit()
# let us create a dataframe with actual, fitted and residual values
df_pred = pd.DataFrame()
df_pred["Actual Values"] = y_train # actual values
df_pred["Fitted Values"] = olsmod2.fittedvalues # predicted values
df_pred["Residuals"] = olsmod2.resid # residuals
df_pred.head()
| Actual Values | Fitted Values | Residuals | |
|---|---|---|---|
| 1472 | 68.37 | 68.202828 | 0.167172 |
| 127 | 260.19 | 219.193721 | 40.996279 |
| 907 | 44.88 | 38.672646 | 6.207354 |
| 2200 | 92.35 | 96.114753 | -3.764753 |
| 1216 | 74.99 | 95.425720 | -20.435720 |
# let's plot the fitted values vs residuals
sns.residplot(
data=df_pred, x="Fitted Values", y="Residuals", color="purple", lowess=True
)
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.title("Fitted vs Residual plot")
plt.show()
sns.histplot(data=df_pred, x="Residuals", kde=True)
plt.title("Normality of residuals")
plt.show()
# There is a bell curve
import pylab
import scipy.stats as stats
stats.probplot(df_pred["Residuals"], dist="norm", plot=pylab)
plt.show()
stats.shapiro(df_pred["Residuals"])
ShapiroResult(statistic=0.9476568102836609, pvalue=6.163355591350064e-29)
import statsmodels.stats.api as sms
from statsmodels.compat import lzip
name = ["F statistic", "p-value"]
test = sms.het_goldfeldquandt(df_pred["Residuals"], X_train_2)
lzip(name, test)
[('F statistic', 1.0199143827807882), ('p-value', 0.364359396111027)]
Since p-value > 0.05, we can say that the residuals are homoscedastic. So, this assumption is satisfied.
olsmodel_final = sm.OLS(y_test, x_test1).fit()
print(olsmodel_final.summary())
OLS Regression Results
==============================================================================
Dep. Variable: used_price R-squared: 0.959
Model: OLS Adj. R-squared: 0.958
Method: Least Squares F-statistic: 2464.
Date: Fri, 22 Oct 2021 Prob (F-statistic): 0.00
Time: 23:07:34 Log-Likelihood: -4430.0
No. Observations: 1071 AIC: 8882.
Df Residuals: 1060 BIC: 8937.
Df Model: 10
Covariance Type: nonrobust
====================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------
const -1574.0019 885.697 -1.777 0.076 -3311.921 163.917
screen_size 0.4316 0.126 3.415 0.001 0.184 0.680
4g -3.3511 1.482 -2.261 0.024 -6.260 -0.443
5g 5.3879 3.022 1.783 0.075 -0.542 11.317
main_camera_mp -0.4349 0.141 -3.095 0.002 -0.711 -0.159
selfie_camera_mp 0.5590 0.147 3.794 0.000 0.270 0.848
int_memory 0.0821 0.016 5.175 0.000 0.051 0.113
ram 1.2128 0.469 2.584 0.010 0.292 2.134
release_year 0.8083 0.439 1.840 0.066 -0.054 1.670
days_used -0.0854 0.003 -28.509 0.000 -0.091 -0.080
new_price 0.3901 0.004 87.414 0.000 0.381 0.399
==============================================================================
Omnibus: 148.058 Durbin-Watson: 1.881
Prob(Omnibus): 0.000 Jarque-Bera (JB): 474.956
Skew: 0.672 Prob(JB): 7.32e-104
Kurtosis: 5.973 Cond. No. 4.07e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.07e+06. This might indicate that there are
strong multicollinearity or other numerical problems.